####################################
#Diff-in-diff with matching and dual-distance cutoffs - function 
#Traffic and Political Attitudes
#Connor Jerzak & Brian Libgober
####################################

dd_with_dd_BootPreMatch <- function(my_data, #data frame 
                       treatment_basis_name, #character string 
                       control_basis_name, #character vector 
                       outcome_time1_name, #character string 
                       outcome_time2_name, #character string 
                       matching_variables, #character vector
                       treatment_basis_treated_max, #numeric 
                       treatment_basis_control_min, #numeric 
                       control_basis_control_max, #numeric 
                       control_basis_treated_min, #numeric 
                       boostrap_SDs=F, 
                       bootstrap_numb=NULL, 
                       conf_level=0.95, 
                       match_quietly=T, 
                       my_caliper = 0, 
                       start_browser=F, 
                       my_method = "nearest", 
                       my_distance="mahalanobis", 
                       my_discard="none", 
                       my_m.order="largest", 
                       block = F, 
                       my_replace=T) #logical 
{#begin function brackets 
  require(MatchIt, quietly=T)
  
  if(start_browser==T){browser()}
  
  #get treatment basis into memory 
  treatment_basis <- eval(parse(text=paste("my_data$", treatment_basis_name, "")))
  
  #get control basis into memory 
  control_basis <- eval(parse(text=paste("my_data$", control_basis_name, "")))
  
  #determine treated_indicator 
  my_data <- cbind(my_data, NA) 
  colnames(my_data)[ncol(my_data)] <- "treated_indicator"
  my_data$treated_indicator[f2n(treatment_basis) <= treatment_basis_treated_max & 
                              f2n(control_basis) >= control_basis_treated_min] <- 1
  my_data$treated_indicator[f2n(control_basis) <= control_basis_control_max & 
                              f2n(treatment_basis) >= treatment_basis_control_min] <- 0 
  
  #delete variable with is.na(treated_indicator)=T
  my_data <- my_data[is.na(my_data$treated_indicator)==F,]
  remove_columns <- colnames(my_data) %in% c(matching_variables, "County", outcome_time1_name, outcome_time2_name, "treated_indicator")
  my_data <- my_data[,remove_columns]
  
  #create matching formula 
  matching_formula <- paste("treated_indicator ~", paste(matching_variables, collapse="+"))
  
  #create estimation_function 
  match_function <- function(match_input)
  { 
    #create matched dataset
    if(my_method=="genetic")
    {  
      matched_results <- matchit(as.formula(matching_formula), data = match_input, 
                                 method = "genetic", 
                                 distance = my_distance, 
                                 discard = my_discard, 
                                 replace = my_replace)
    }
    
    if(my_method=="nearest")
    { 
      matched_results <- matchit(as.formula(matching_formula), data = match_input, 
                                 method = "nearest", 
                                 distance= my_distance, 
                                 discard= my_discard, 
                                 caliper = my_caliper, 
                                 m.order = my_m.order, 
                                 reestimate = T, 
                                 replace = my_replace)
    }
    else
    { 
      matched_results <- matchit(as.formula(matching_formula), data = match_input, 
                                 method = my_method, 
                                 distance= my_distance, 
                                 discard= my_discard, 
                                 m.order=my_m.order, 
                                 caliper = my_caliper, 
                                 replace=my_replace)
    }
    
    
    #obtain matched dataset 
    matched_dataset <- match.data(matched_results)
    
    return(list(matched_dataset=matched_dataset,
                match_metadata=matched_results))
  }
  
  dd_calculator <- function(input_matched_dataset)
  { 
    mean_treat_time1 <- mean(as.numeric(as.character(
      matched_dataset$weights[input_matched_dataset$treated_indicator==1] *
        input_matched_dataset[input_matched_dataset$treated_indicator==1,which(colnames(input_matched_dataset)==outcome_time1_name)])), 
      na.rm=T)
    mean_treat_time2 <- mean(as.numeric(as.character(
      matched_dataset$weights[input_matched_dataset$treated_indicator==1] * 
        input_matched_dataset[input_matched_dataset$treated_indicator==1,which(colnames(input_matched_dataset)==outcome_time2_name)])), 
      na.rm=T)
    
    mean_control_time1 <- mean(as.numeric(as.character(
      matched_dataset$weights[input_matched_dataset$treated_indicator==0] *          
        input_matched_dataset[input_matched_dataset$treated_indicator==0,which(colnames(input_matched_dataset)==outcome_time1_name)])), 
      na.rm=T)
    mean_control_time2 <- mean(as.numeric(as.character(
      matched_dataset$weights[input_matched_dataset$treated_indicator==0] *    
        input_matched_dataset[input_matched_dataset$treated_indicator==0,which(colnames(input_matched_dataset)==outcome_time2_name)])), 
      na.rm=T)
    
    diff_in_diff_estimate <- (mean_treat_time2 - mean_treat_time1) - (mean_control_time2 - mean_control_time1)
    return(diff_in_diff_estimate)
  }
  #initial investigation of non-matched data 

  boot_est_vec <- c()
  for(iti in 1:(bootstrap_numb+1) ) 
  { 
    if(iti %% 2 == 0) { sprintf("Iteration %s", print(iti)) } 
    if(iti == 1){my_data_i  <- my_data}
    if(iti > 1)
    {
      if(block == T)
      { 
        unique_counties <- names( table( as.character(my_data$County) )  )
        counties_boot <- sample(unique_counties, 
                                length(unique_counties), 
                                replace = T)
        boot_indices <- c()
        for(county_j in 1:length(counties_boot))
        {
          boot_indices <- append(boot_indices, which( my_data$County %in% counties_boot[county_j] )  )
        }
      }
      
      if(block == F)
      { 
      boot_indices <- sample(1:nrow(my_data), nrow(my_data), replace = T)
      } 
      my_data_i  <- my_data[boot_indices,]
    }
    
    my_data_i <- my_data_i[,!colnames(my_data_i) %in% c("County")]
    #create matched dataset 
    match_input <- apply(my_data_i, 2, f2n)
    match_input <- as.data.frame(  na.omit( match_input ) )  
    match_raw <- match_function(match_input=match_input)
    matched_dataset <-  match_raw$matched_dataset
  
    #store metadata 
    match_numb <- nrow(matched_dataset)
    balance_table <- summary(match_raw$match_metadata)
    
    #calculate base result 
    base_result <- dd_calculator(matched_dataset)
    if(iti == 1)
    {
      point_est <- base_result
      match_numb_return <- match_numb
      matched_dataset_return <- matched_dataset
      match_raw_return <- match_raw
      balance_table_return <- balance_table
    }
    if(iti > 1){boot_est_vec[iti-1] <- base_result }
  }
  
  #find the 95% quantiles 
  lower <- quantile(boot_est_vec, (1-conf_level)/2, na.rm=T)
  upper <- quantile(boot_est_vec, (1+conf_level)/2, na.rm=T)
    
  return_list <- list( point_est = point_est, 
                       lower = lower, 
                       upper = upper, 
                       boot_sd = sd(boot_est_vec, na.rm = T), 
                       match_numb = match_numb_return, 
                       matched_dataset = list(matched_dataset_return), 
                       raw_dataset = list(my_data), 
                       match_output = match_raw_return, 
                       balance_table = list( balance_table_return) )
  return(return_list) 
}#end function barkets 
